This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ readr 1.3.1
## ✔ tibble 2.1.3 ✔ purrr 0.3.2
## ✔ tidyr 1.0.0 ✔ stringr 1.4.0
## ✔ ggplot2 3.2.1 ✔ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(forcats)
library(ggthemes)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(knitr)
library(naniar)
library(broom)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ggpubr)
## Loading required package: magrittr
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
library(ggplot2)
library(ggthemes)
library(RColorBrewer)
library(dplyr)
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(tidyverse)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
library(xtable)
You can also embed plots, for example:
clean_water_quality_data <- readRDS("../../data/processed_data/clean_water_quality_data.rds")
#if you get an error - this happens because you need to save the file first cause it refers to a relative path
TABLES Lets start with positives by site….
library('tigerstats')
## Loading required package: abd
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## Loading required package: grid
## Loading required package: mosaic
## Loading required package: ggformula
## Loading required package: ggstance
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Registered S3 method overwritten by 'mosaic':
## method from
## fortify.SpatialPolygonsDataFrame ggplot2
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following object is masked from 'package:caret':
##
## dotPlot
## The following object is masked from 'package:plotly':
##
## do
## The following object is masked from 'package:ggthemes':
##
## theme_map
## The following object is masked from 'package:purrr':
##
## cross
## The following object is masked from 'package:ggplot2':
##
## stat
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
## Welcome to tigerstats!
## To learn more about this package, consult its website:
## http://homerhanumat.github.io/tigerstats
library('formattable')
##
## Attaching package: 'formattable'
## The following object is masked from 'package:xtable':
##
## digits
## The following object is masked from 'package:plotly':
##
## style
library('gridExtra')
To do this we need to alter the data first
new_wq <- clean_water_quality_data
new_wq$arco_new <- new_wq$A_butzleri_HSP60
new_wq$HF183_new <- new_wq$HF183
unique(new_wq$arco_new)
## [1] 3.391993 3.693023 3.316096 4.368659 4.283681 3.299289 3.869818
## [8] 4.383025 4.112404 3.979002 4.172147 3.423574 4.964410 3.677424
## [15] 4.133411 3.940118 4.549077 4.807535 5.000522 3.671747 4.060388
## [22] 4.037148 4.281111 3.810609 4.207322 3.434345 3.757290 3.415140
## [29] 3.963948 4.335562 4.378710 4.374858 4.295717 4.364660 4.091597
## [36] 4.016699 3.541579 4.530891 4.453012 4.242939 4.130527 4.263731
## [43] 4.430978 3.953953 3.826593 3.887054 3.836197 3.646796 3.657820
## [50] 3.959709 3.493319 4.116608 3.835056 3.686636 3.871573 4.284372
## [57] 4.423092 3.734224 3.438257 4.123329 3.645029 3.767156 3.872972
## [64] 4.093912 4.655004 3.857694 3.466571 3.658965 3.422590 3.744293
## [71] 3.651472 3.697752 3.970440 3.316599 3.594393 3.497565 3.800703
## [78] 3.678300 3.379124 3.709049 3.296928 3.446537 4.766889 4.294025
## [85] 3.617000 3.491726 4.465680 3.309630 3.509740 4.040484 4.214505
## [92] 3.875686 3.273696 4.558310 3.627755 4.193125 3.952792 3.584263
## [99] 4.155221 4.166004
str(new_wq)
## 'data.frame': 533 obs. of 26 variables:
## $ Date : chr "May 9 2017" "May 9 2017" "May 9 2017" "May 9 2017" ...
## $ Date_Sampled : Date, format: "2017-05-09" "2017-05-09" ...
## $ Week : chr "Week 1" "Week 1" "Week 1" "Week 1" ...
## $ Pond : chr "Country_Hills" "Inverness" "Country_Hills" "Inverness" ...
## $ Sampling_Site : chr "Outfall WP31E" "Outfall_Inlet" "Outfall WP31C" "Outfall WP26B" ...
## $ Sample_ID : chr "29759" "29760" "29761" "29762" ...
## $ Rainfall : chr "0.2" "0.2" "0.2" "0.2" ...
## $ HF183 : num 3.33 3.33 3.33 3.33 3.33 ...
## $ HumM2 : num 3.85 3.85 3.85 3.85 3.85 ...
## $ CG01 : num 3.11 3.11 3.11 3.11 3.11 ...
## $ LeeSg : num 3.28 3.28 3.28 3.28 3.28 ...
## $ Dog3 : num 3.16 3.16 3.16 3.16 3.16 ...
## $ MuBac : num 3.55 3.55 3.55 3.55 3.55 ...
## $ Rum2Bac : num 3.46 3.46 3.46 3.46 3.46 ...
## $ A_butzleri_HSP60 : num 3.39 3.39 3.39 3.39 3.69 ...
## $ Salmonella_spp_InvA : num 3.17 3.17 3.17 3.17 3.17 ...
## $ Campylobacter_spp_Van_Dkye: num 3.55 3.55 3.55 3.55 3.55 ...
## $ Enterococcus_CCE : num 2.12 2.06 2.36 2.25 2.09 ...
## $ Shigatoxin_1 : chr "negative" "NA" "negative" "NA" ...
## $ Shigatoxin_2 : chr "negative" "NA" "negative" "NA" ...
## $ total_coliforms : num 2.64 1.79 2.54 1.46 2.44 ...
## $ E_coli : num 0.602 0 1.362 0 0 ...
## $ Enteroalert : chr "0" "0" "1" "0.301029995663981" ...
## $ thermotolerant_coliforms : num 0.954 0.954 1 0.954 0.954 ...
## $ arco_new : num 3.39 3.39 3.39 3.39 3.69 ...
## $ HF183_new : num 3.33 3.33 3.33 3.33 3.33 ...
#convert characters to categorical varaibles
new_wq$Pond <- as.factor(as.character(new_wq$Pond))
new_wq$Sampling_Site <- as.factor(as.character(new_wq$Sampling_Site))
new_wq$Sampling_Site <- as.factor(as.character(new_wq$Sampling_Site))
str(new_wq)
## 'data.frame': 533 obs. of 26 variables:
## $ Date : chr "May 9 2017" "May 9 2017" "May 9 2017" "May 9 2017" ...
## $ Date_Sampled : Date, format: "2017-05-09" "2017-05-09" ...
## $ Week : chr "Week 1" "Week 1" "Week 1" "Week 1" ...
## $ Pond : Factor w/ 3 levels "Country_Hills",..: 1 2 1 2 1 3 1 3 3 3 ...
## $ Sampling_Site : Factor w/ 13 levels "Inlet 3/4","Inlet PR60",..: 12 13 10 6 3 2 9 4 1 5 ...
## $ Sample_ID : chr "29759" "29760" "29761" "29762" ...
## $ Rainfall : chr "0.2" "0.2" "0.2" "0.2" ...
## $ HF183 : num 3.33 3.33 3.33 3.33 3.33 ...
## $ HumM2 : num 3.85 3.85 3.85 3.85 3.85 ...
## $ CG01 : num 3.11 3.11 3.11 3.11 3.11 ...
## $ LeeSg : num 3.28 3.28 3.28 3.28 3.28 ...
## $ Dog3 : num 3.16 3.16 3.16 3.16 3.16 ...
## $ MuBac : num 3.55 3.55 3.55 3.55 3.55 ...
## $ Rum2Bac : num 3.46 3.46 3.46 3.46 3.46 ...
## $ A_butzleri_HSP60 : num 3.39 3.39 3.39 3.39 3.69 ...
## $ Salmonella_spp_InvA : num 3.17 3.17 3.17 3.17 3.17 ...
## $ Campylobacter_spp_Van_Dkye: num 3.55 3.55 3.55 3.55 3.55 ...
## $ Enterococcus_CCE : num 2.12 2.06 2.36 2.25 2.09 ...
## $ Shigatoxin_1 : chr "negative" "NA" "negative" "NA" ...
## $ Shigatoxin_2 : chr "negative" "NA" "negative" "NA" ...
## $ total_coliforms : num 2.64 1.79 2.54 1.46 2.44 ...
## $ E_coli : num 0.602 0 1.362 0 0 ...
## $ Enteroalert : chr "0" "0" "1" "0.301029995663981" ...
## $ thermotolerant_coliforms : num 0.954 0.954 1 0.954 0.954 ...
## $ arco_new : num 3.39 3.39 3.39 3.39 3.69 ...
## $ HF183_new : num 3.33 3.33 3.33 3.33 3.33 ...
#want to convert a new column with acro as yes or no to get the percent of the samples positive
#lets do arco first
new_wq <- new_wq %>% mutate(arco_y_n=
ifelse(arco_new < "3.391993", "no",
ifelse(arco_new > "3.391994", "yes",
"no")))
#Now HF183 and the rest
new_wq <- new_wq %>% mutate(HF183_y_n=
ifelse(HF183_new < "3.329601", "no",
ifelse(HF183_new > "3.329602", "yes",
"no")))
new_wq <- new_wq %>% mutate(HumM2_y_n=
ifelse(HumM2 < "3.847449", "no",
ifelse(HumM2 > "3.847450", "yes",
"no")))
new_wq <- new_wq %>% mutate(CG01_y_n=
ifelse(CG01 < "3.108565", "no",
ifelse(CG01 > "3.108566", "yes",
"no")))
new_wq <- new_wq %>% mutate(LeeSg_y_n=
ifelse(LeeSg < "3.283979", "no",
ifelse(LeeSg > "3.283980", "yes",
"no")))
new_wq <- new_wq %>% mutate(Dog3_y_n=
ifelse(Dog3 < "3.156549", "no",
ifelse(Dog3 > "3.156550", "yes",
"no")))
new_wq <- new_wq %>% mutate(MuBac_y_n=
ifelse(MuBac < "3.554852", "no",
ifelse(MuBac > "3.554853", "yes",
"no")))
new_wq <- new_wq %>% mutate(Rum2Bac_y_n=
ifelse(Rum2Bac < "3.457125", "no",
ifelse(Rum2Bac > "3.457126", "yes",
"no")))
new_wq <- new_wq %>% mutate(Salmonella_y_n=
ifelse(Salmonella_spp_InvA < "3.170848", "no",
ifelse(Salmonella_spp_InvA > "3.170849", "yes",
"no")))
new_wq <- new_wq %>% mutate(Campy_y_n=
ifelse(Campylobacter_spp_Van_Dkye < "3.554852", "no",
ifelse(Campylobacter_spp_Van_Dkye > "3.554853", "yes",
"no")))
#water quality indicators
new_wq <- new_wq %>% mutate(Entero_y_n=
ifelse(Enterococcus_CCE < "3.102", "no",
ifelse(Enterococcus_CCE > "3.103", "yes",
"no")))
new_wq <- new_wq %>% mutate(thermo_y_n=
ifelse(thermotolerant_coliforms < "2.602", "no",
ifelse(thermotolerant_coliforms > "2.603", "yes",
"no")))
xtabs(~Pond+Entero_y_n, data=new_wq)
## Entero_y_n
## Pond no yes
## Country_Hills 180 25
## Inverness 143 21
## McCall_Lake 116 46
Entero_Pond<- rowPerc(xtabs(~Pond+Entero_y_n, data=new_wq))
png("../../results/Entero_pond_table.png", height=400, width=400)
p<-tableGrob(Entero_Pond)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
xtabs(~Pond+thermo_y_n, data=new_wq)
## thermo_y_n
## Pond no yes
## Country_Hills 185 20
## Inverness 160 3
## McCall_Lake 133 31
thermo_pond<- rowPerc(xtabs(~Pond+thermo_y_n, data=new_wq))
png("../../results/thermo_pond_table.png", height=400, width=400)
p<-tableGrob(thermo_pond)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Arco positives by site
xtabs(~Pond+arco_y_n, data=new_wq)
## arco_y_n
## Pond no yes
## Country_Hills 166 39
## Inverness 124 40
## McCall_Lake 104 60
Arco_Pond<- rowPerc(xtabs(~Pond+arco_y_n, data=new_wq))
png("../../results/Arco_pond_table.png", height=400, width=400)
p<-tableGrob(Arco_Pond)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
Arco_Pond %>%
kable() %>%
kable_styling(bootstrap_options = c("striped", "hover")) %>%
save_kable(file = "../../results/Arco_Pond_table_4.png")
#kable_as_image("../../results/test3")
#kableExtra::save_kable(Arco_Pond, file = "table.pdf")
##save_kable(file = "../../results/Arco_Pond_table_4.png")
##save(Arco_Pond, file = "../../results/Arco_Pond_table_2.RData")
#print(xtable(head(Arco_Pond_2)), type="html")
library(xtable)
print(xtable(Arco_Pond), type='html')
| no | yes | Total | |
|---|---|---|---|
| Country_Hills | 80.98 | 19.02 | 100.00 |
| Inverness | 75.61 | 24.39 | 100.00 |
| McCall_Lake | 63.41 | 36.59 | 100.00 |
xtabs(~Pond+HF183_y_n, data=new_wq)
## HF183_y_n
## Pond no yes
## Country_Hills 150 55
## Inverness 140 24
## McCall_Lake 94 70
HF183_y_n_pond<- rowPerc(xtabs(~Pond+HF183_y_n, data=new_wq))
png("../../results/HF183_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HF183_y_n_pond)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
xtabs(~Pond+HumM2_y_n, data=new_wq)
## HumM2_y_n
## Pond no yes
## Country_Hills 190 15
## Inverness 159 5
## McCall_Lake 138 26
HumM2_y_n_pond<- rowPerc(xtabs(~Pond+HumM2_y_n, data=new_wq))
png("../../results/HumM2_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HumM2_y_n_pond)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Campy positives by site
xtabs(~Sampling_Site+Campy_y_n, data=new_wq)
## Campy_y_n
## Sampling_Site no yes
## Inlet 3/4 38 3
## Inlet PR60 39 2
## Inlet WP31B 42 0
## Outfall ML1 38 3
## Outfall ML2 38 3
## Outfall WP26B 38 0
## Outfall WP26C 39 0
## Outfall WP26D 45 1
## Outfall WP31A 40 1
## Outfall WP31C 38 3
## Outfall WP31D 37 4
## Outfall WP31E 39 1
## Outfall_Inlet 40 1
campy_site<- rowPerc(xtabs(~Sampling_Site+Campy_y_n, data=new_wq))
png("../../results/campy_site_table.png", height=400, width=400)
p<-tableGrob(campy_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Arco positives by site
xtabs(~Sampling_Site+arco_y_n, data=new_wq)
## arco_y_n
## Sampling_Site no yes
## Inlet 3/4 29 12
## Inlet PR60 24 17
## Inlet WP31B 30 12
## Outfall ML1 27 14
## Outfall ML2 24 17
## Outfall WP26B 32 6
## Outfall WP26C 35 4
## Outfall WP26D 39 7
## Outfall WP31A 36 5
## Outfall WP31C 34 7
## Outfall WP31D 36 5
## Outfall WP31E 30 10
## Outfall_Inlet 18 23
arco_site<- rowPerc(xtabs(~Sampling_Site+arco_y_n, data=new_wq))
png("../../results/arco_site_table.png", height=400, width=400)
p<-tableGrob(arco_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#save as df
save(arco_site, file = "../../results/saved_arco_site.RData")
#Salmonella positives by site
xtabs(~Sampling_Site+Salmonella_y_n, data=new_wq)
## Salmonella_y_n
## Sampling_Site no yes
## Inlet 3/4 41 0
## Inlet PR60 41 0
## Inlet WP31B 42 0
## Outfall ML1 41 0
## Outfall ML2 39 2
## Outfall WP26B 38 0
## Outfall WP26C 39 0
## Outfall WP26D 46 0
## Outfall WP31A 41 0
## Outfall WP31C 41 0
## Outfall WP31D 40 1
## Outfall WP31E 40 0
## Outfall_Inlet 40 1
Salmonella_site<- rowPerc(xtabs(~Sampling_Site+Salmonella_y_n, data=new_wq))
png("../../results/salmonella_site_table.png", height=400, width=400)
p<-tableGrob(Salmonella_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#HF183 positives by site
xtabs(~Pond+HF183_y_n, data=new_wq)
## HF183_y_n
## Pond no yes
## Country_Hills 150 55
## Inverness 140 24
## McCall_Lake 94 70
HF183_y_n_site<- rowPerc(xtabs(~Pond+HF183_y_n, data=new_wq))
png("../../results/HF183_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HF183_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#HumM2
xtabs(~Pond+HumM2_y_n, data=new_wq)
## HumM2_y_n
## Pond no yes
## Country_Hills 190 15
## Inverness 159 5
## McCall_Lake 138 26
HumM2_y_n_site<- rowPerc(xtabs(~Pond+HumM2_y_n, data=new_wq))
png("../../results/HumM2_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(HumM2_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#CG01 positives by site
xtabs(~Pond+CG01_y_n, data=new_wq)
## CG01_y_n
## Pond no yes
## Country_Hills 202 3
## Inverness 162 2
## McCall_Lake 157 7
CG01_y_n_site<- rowPerc(xtabs(~Pond+CG01_y_n, data=new_wq))
png("../../results/CG01_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(CG01_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Dog3 positives by site
xtabs(~Pond+Dog3_y_n, data=new_wq)
## Dog3_y_n
## Pond no yes
## Country_Hills 198 7
## Inverness 163 1
## McCall_Lake 159 5
Dog3_y_n_site<- rowPerc(xtabs(~Pond+Dog3_y_n, data=new_wq))
png("../../results/Dog3_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(Dog3_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Rum2Bac positives by site
xtabs(~Pond+Rum2Bac_y_n, data=new_wq)
## Rum2Bac_y_n
## Pond no yes
## Country_Hills 200 5
## Inverness 163 1
## McCall_Lake 161 3
Rum2Bac_y_n_site<- rowPerc(xtabs(~Pond+Rum2Bac_y_n, data=new_wq))
png("../../results/Rum2Bac_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(Rum2Bac_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#LeeSg positives by site
xtabs(~Pond+LeeSg_y_n, data=new_wq)
## LeeSg_y_n
## Pond no yes
## Country_Hills 185 20
## Inverness 157 7
## McCall_Lake 139 25
LeeSg_y_n_site<- rowPerc(xtabs(~Pond+LeeSg_y_n, data=new_wq))
png("../../results/LeeSg_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(LeeSg_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#MuBac positives by site
xtabs(~Pond+MuBac_y_n, data=new_wq)
## MuBac_y_n
## Pond no yes
## Country_Hills 202 3
## Inverness 163 1
## McCall_Lake 163 1
MuBac_y_n_site<- rowPerc(xtabs(~Pond+MuBac_y_n, data=new_wq))
png("../../results/MuBac_y_n_pond_table.png", height=400, width=400)
p<-tableGrob(MuBac_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#HF183 positives by site
xtabs(~Sampling_Site+HF183_y_n, data=new_wq)
## HF183_y_n
## Sampling_Site no yes
## Inlet 3/4 34 7
## Inlet PR60 27 14
## Inlet WP31B 31 11
## Outfall ML1 30 11
## Outfall ML2 3 38
## Outfall WP26B 34 4
## Outfall WP26C 31 8
## Outfall WP26D 41 5
## Outfall WP31A 37 4
## Outfall WP31C 30 11
## Outfall WP31D 24 17
## Outfall WP31E 28 12
## Outfall_Inlet 34 7
HF183_y_n_site<- rowPerc(xtabs(~Sampling_Site+HF183_y_n, data=new_wq))
png("../../results/HF183_y_n_site_table.png", height=400, width=400)
p<-tableGrob(HF183_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#HumM2 positives by site
xtabs(~Sampling_Site+HumM2_y_n, data=new_wq)
## HumM2_y_n
## Sampling_Site no yes
## Inlet 3/4 39 2
## Inlet PR60 39 2
## Inlet WP31B 42 0
## Outfall ML1 37 4
## Outfall ML2 23 18
## Outfall WP26B 37 1
## Outfall WP26C 37 2
## Outfall WP26D 45 1
## Outfall WP31A 40 1
## Outfall WP31C 38 3
## Outfall WP31D 34 7
## Outfall WP31E 36 4
## Outfall_Inlet 40 1
HumM2_y_n_site<- rowPerc(xtabs(~Sampling_Site+HumM2_y_n, data=new_wq))
png("../../results/HumM2_y_n_site_table.png", height=400, width=400)
p<-tableGrob(HumM2_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#CG01 positives by site
xtabs(~Sampling_Site+CG01_y_n, data=new_wq)
## CG01_y_n
## Sampling_Site no yes
## Inlet 3/4 41 0
## Inlet PR60 40 1
## Inlet WP31B 41 1
## Outfall ML1 37 4
## Outfall ML2 39 2
## Outfall WP26B 37 1
## Outfall WP26C 39 0
## Outfall WP26D 46 0
## Outfall WP31A 41 0
## Outfall WP31C 40 1
## Outfall WP31D 41 0
## Outfall WP31E 39 1
## Outfall_Inlet 40 1
CG01_y_n_site<- rowPerc(xtabs(~Sampling_Site+CG01_y_n, data=new_wq))
png("../../results/CG01_y_n_site_table.png", height=400, width=400)
p<-tableGrob(CG01_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Dog3 positives by site
xtabs(~Sampling_Site+Dog3_y_n, data=new_wq)
## Dog3_y_n
## Sampling_Site no yes
## Inlet 3/4 40 1
## Inlet PR60 40 1
## Inlet WP31B 42 0
## Outfall ML1 41 0
## Outfall ML2 38 3
## Outfall WP26B 37 1
## Outfall WP26C 39 0
## Outfall WP26D 46 0
## Outfall WP31A 39 2
## Outfall WP31C 38 3
## Outfall WP31D 39 2
## Outfall WP31E 40 0
## Outfall_Inlet 41 0
Dog3_y_n_site<- rowPerc(xtabs(~Sampling_Site+Dog3_y_n, data=new_wq))
png("../../results/Dog3_y_n_site_table.png", height=400, width=400)
p<-tableGrob(Dog3_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Rum2Bac positives by site
xtabs(~Sampling_Site+Rum2Bac_y_n, data=new_wq)
## Rum2Bac_y_n
## Sampling_Site no yes
## Inlet 3/4 41 0
## Inlet PR60 41 0
## Inlet WP31B 40 2
## Outfall ML1 39 2
## Outfall ML2 40 1
## Outfall WP26B 38 0
## Outfall WP26C 39 0
## Outfall WP26D 45 1
## Outfall WP31A 41 0
## Outfall WP31C 41 0
## Outfall WP31D 38 3
## Outfall WP31E 40 0
## Outfall_Inlet 41 0
Rum2Bac_y_n_site<- rowPerc(xtabs(~Sampling_Site+Rum2Bac_y_n, data=new_wq))
png("../../results/Rum2Bac_y_n_site_table.png", height=400, width=400)
p<-tableGrob(Rum2Bac_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#LeeSg positives by site
xtabs(~Sampling_Site+LeeSg_y_n, data=new_wq)
## LeeSg_y_n
## Sampling_Site no yes
## Inlet 3/4 38 3
## Inlet PR60 35 6
## Inlet WP31B 40 2
## Outfall ML1 34 7
## Outfall ML2 32 9
## Outfall WP26B 35 3
## Outfall WP26C 39 0
## Outfall WP26D 44 2
## Outfall WP31A 39 2
## Outfall WP31C 34 7
## Outfall WP31D 36 5
## Outfall WP31E 36 4
## Outfall_Inlet 39 2
LeeSg_y_n_site<- rowPerc(xtabs(~Sampling_Site+LeeSg_y_n, data=new_wq))
png("../../results/LeeSg_y_n_site_table.png", height=400, width=400)
p<-tableGrob(LeeSg_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Entero by site
xtabs(~Sampling_Site+Entero_y_n, data=new_wq)
## Entero_y_n
## Sampling_Site no yes
## Inlet 3/4 34 6
## Inlet PR60 28 12
## Inlet WP31B 38 4
## Outfall ML1 34 7
## Outfall ML2 20 21
## Outfall WP26B 38 0
## Outfall WP26C 34 5
## Outfall WP26D 40 6
## Outfall WP31A 40 1
## Outfall WP31C 31 10
## Outfall WP31D 35 6
## Outfall WP31E 36 4
## Outfall_Inlet 31 10
Entero_y_n_site<- rowPerc(xtabs(~Sampling_Site+Entero_y_n, data=new_wq))
png("../../results/Entero_y_n_site_table.png", height=400, width=400)
p<-tableGrob(Entero_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#Thermotoler coliforms by site
xtabs(~Sampling_Site+thermo_y_n, data=new_wq)
## thermo_y_n
## Sampling_Site no yes
## Inlet 3/4 38 3
## Inlet PR60 36 5
## Inlet WP31B 42 0
## Outfall ML1 36 5
## Outfall ML2 23 18
## Outfall WP26B 37 1
## Outfall WP26C 38 1
## Outfall WP26D 46 0
## Outfall WP31A 41 0
## Outfall WP31C 31 10
## Outfall WP31D 36 5
## Outfall WP31E 35 5
## Outfall_Inlet 39 1
thermo_y_n_site<- rowPerc(xtabs(~Sampling_Site+thermo_y_n, data=new_wq))
png("../../results/thermo_y_n_site_table.png", height=400, width=400)
p<-tableGrob(thermo_y_n_site)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
LINE GRAPHS Data: clean_data First lets look at line graphs by sampling site.
library('gridExtra')
library('ggplot2')
#it stays the same but i want to shorten the name
clean_data <- clean_water_quality_data
ML2 <- subset(clean_data, Sampling_Site == "Outfall ML2")
ML1 <- subset(clean_data, Sampling_Site == "Outfall ML1")
PR60 <- subset(clean_data, Sampling_Site == "Inlet PR60")
Inlet_34 <- subset(clean_data, Sampling_Site == "Inlet 3/4")
McCall_Lake <- subset(clean_data, Pond == "McCall_Lake")
Inverness <- subset(clean_data, Pond == "Inverness")
Country_Hills <- subset(clean_data, Pond == "Country_Hills")
#Need to fix as some of it is getting cut off
#want to add arrows pointing at holidays (https://www.r-bloggers.com/point-arrows-to-specific-parts-of-the-data-2/)
#tried this: geom_segment(aes(x = Date_Sampled, y = 5.5, xend = Date_Sampled, yend = 4.5),
#arrow = arrow(length = unit(0.5, "cm")))
HF183_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = HF183, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.67, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 6.3) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "McCall Sampling Sites") +
annotate("segment", x= as.Date("2017-07-03"), xend = as.Date("2017-07-03"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-05-22"), xend = as.Date("2017-05-22"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-08-07"), xend = as.Date("2017-08-07"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-03"), xend = as.Date("2017-09-03"), y = 6, yend = 5, arrow = arrow(), size = 1)
HF183_at_McCall
ggsave(filename = "../../results/HF183_at_McCall.png",plot = HF183_at_McCall)
## Saving 7 x 5 in image
HF183_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = HF183, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.67, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 6.3) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Country Hills Sampling Sites") +
annotate("segment", x= as.Date("2017-07-03"), xend = as.Date("2017-07-03"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-05-22"), xend = as.Date("2017-05-22"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-08-07"), xend = as.Date("2017-08-07"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-03"), xend = as.Date("2017-09-03"), y = 6, yend = 5, arrow = arrow(), size = 1)
HF183_at_Country_Hills
ggsave(filename = "../../results/HF183_at_Country_Hills.png",plot = HF183_at_Country_Hills)
## Saving 7 x 5 in image
HF183_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = HF183, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.67, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 6.3) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Inverness Sampling Sites") +
annotate("segment", x= as.Date("2017-07-03"), xend = as.Date("2017-07-03"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-05-22"), xend = as.Date("2017-05-22"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-08-07"), xend = as.Date("2017-08-07"), y = 6, yend = 5, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-03"), xend = as.Date("2017-09-03"), y = 6, yend = 5, arrow = arrow(), size = 1)
HF183_at_Inverness
ggsave(filename = "../../results/HF183_at_Inverness.png",plot = HF183_at_Inverness)
## Saving 7 x 5 in image
HF183_all_arranged <- grid.arrange(HF183_at_McCall, HF183_at_Country_Hills, HF183_at_Inverness)
ggsave(filename = "../../results/HF183_all_arranged.png",plot = HF183_all_arranged)
## Saving 7 x 5 in image
#Need to fix as some of it is getting cut off
Arco_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 5.2) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "McCall Sampling Sites") +
annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) + annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1)
Arco_at_McCall
ggsave(filename = "../../results/Arco_at_McCall.png",plot = Arco_at_McCall)
## Saving 7 x 5 in image
Arco_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 5.2) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Country Hills Sampling Sites")
Arco_at_Country_Hills
ggsave(filename = "../../results/Arco_at_Country_Hills.png",plot = Arco_at_Country_Hills)
## Saving 7 x 5 in image
Arco_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 5.2) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Inverness Sampling Sites")
Arco_at_Inverness
ggsave(filename = "../../results/Arco_at_Inverness.png",plot = Arco_at_Inverness)
## Saving 7 x 5 in image
Arco_all_arranged <- grid.arrange(Arco_at_Country_Hills, Arco_at_Inverness, Arco_at_McCall)
ggsave(filename = "../../results/Arco_all_arranged.png",plot = Arco_all_arranged)
## Saving 7 x 5 in image
#Need to fix as some of it is getting cut off
Arco_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 5.2) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "McCall Sampling Sites") +
annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) + annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5.2, yend = 4.7, arrow = arrow(), size = 1)
Arco_at_McCall
ggsave(filename = "../../results/Arco_at_McCall.png",plot = Arco_at_McCall)
## Saving 7 x 5 in image
Arco_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 5.2) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Country Hills Sampling Sites")
Arco_at_Country_Hills
ggsave(filename = "../../results/Arco_at_Country_Hills.png",plot = Arco_at_Country_Hills)
## Saving 7 x 5 in image
Arco_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = A_butzleri_HSP60, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.69, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 5.2) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Inverness Sampling Sites")
Arco_at_Inverness
ggsave(filename = "../../results/Arco_at_Inverness.png",plot = Arco_at_Inverness)
## Saving 7 x 5 in image
Arco_all_arranged <- grid.arrange(Arco_at_Country_Hills, Arco_at_Inverness, Arco_at_McCall)
ggsave(filename = "../../results/Arco_all_arranged.png",plot = Arco_all_arranged)
## Saving 7 x 5 in image
#Need to fix as some of it is getting cut off
HumM2_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = HumM2, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 4.14, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 6) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "McCall Sampling Sites")
HumM2_at_McCall
ggsave(filename = "../../results/HumM2_at_McCall.png",plot = HumM2_at_McCall)
## Saving 7 x 5 in image
HumM2_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = HumM2, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 4.14, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 6) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Country Hills Sampling Sites")
HumM2_at_Country_Hills
ggsave(filename = "../../results/HumM2_at_Country_Hills.png",plot = HumM2_at_Country_Hills)
## Saving 7 x 5 in image
HumM2_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = HumM2, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 4.14, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(3.2, 6) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Inverness Sampling Sites")
HumM2_at_Inverness
ggsave(filename = "../../results/HumM2_at_Inverness.png",plot = HumM2_at_Inverness)
## Saving 7 x 5 in image
HumM2_all_arranged <- grid.arrange(HumM2_at_Country_Hills, HumM2_at_Inverness, HumM2_at_McCall)
ggsave(filename = "../../results/HumM2_all_arranged.png",plot = HumM2_all_arranged)
## Saving 7 x 5 in image
CGO1_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = CG01, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.4, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(2.5, 6) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "McCall Sampling Sites")
CGO1_at_McCall
ggsave(filename = "../../results/CGO1_at_McCall.png",plot = CGO1_at_McCall)
## Saving 7 x 5 in image
CG01_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = CG01, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.4, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(2.5, 6) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Country Hills Sampling Sites")
CG01_at_Country_Hills
ggsave(filename = "../../results/_at_Country_Hills.png",plot = CG01_at_Country_Hills)
## Saving 7 x 5 in image
CG01_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = CG01, color = Sampling_Site)) + geom_line() +
geom_point() +
geom_hline(yintercept = 3.4, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(2.5, 6) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 4), legend.title=element_text(size=5)) +
labs(color = "Inverness Sampling Sites")
CG01_at_Inverness
ggsave(filename = "../../results/CG01_at_Inverness.png",plot = CG01_at_Inverness)
## Saving 7 x 5 in image
HumM2_all_arranged <- grid.arrange(CG01_at_Country_Hills, CG01_at_Inverness, CGO1_at_McCall)
ggsave(filename = "../../results/HumM2_all_arranged.png",plot = HumM2_all_arranged)
## Saving 7 x 5 in image
Seagull_at_McCall <- ggplot(McCall_Lake, aes(x = Date_Sampled, y = LeeSg, color = Sampling_Site)) +
geom_line() +
geom_point() +
geom_hline(yintercept = 3.28, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(2.5, 5) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 7), legend.title=element_text(size=7)) +
labs(color = "McCall Sampling Sites") +
annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5, yend = 4.7, arrow = arrow(), size = 1)
Seagull_at_McCall
ggsave(filename = "../../results/LeeSg_at_McCall.png",plot = Seagull_at_McCall)
## Saving 7 x 5 in image
Seagull_at_Inverness <- ggplot(Inverness, aes(x = Date_Sampled, y = LeeSg, color = Sampling_Site)) +
geom_line() +
geom_point() +
geom_hline(yintercept = 3.28, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(2.5, 5) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 7), legend.title=element_text(size=7)) +
labs(color = "Inverness Sampling Sites") +
annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5, yend = 4.7, arrow = arrow(), size = 1)
Seagull_at_Inverness
ggsave(filename = "../../results/LeeSg_at_Inverness.png",plot = Seagull_at_Inverness)
## Saving 7 x 5 in image
Seagull_at_Country_Hills <- ggplot(Country_Hills, aes(x = Date_Sampled, y = LeeSg, color = Sampling_Site)) +
geom_line() +
geom_point() +
geom_hline(yintercept = 3.28, colour = "black", linetype = "dotted") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw() +
ylim(2.5, 5) +
theme(axis.title.x = element_text( size = 7), axis.title.y = element_text( size = 7), legend.text = element_text( size = 7), legend.title=element_text(size=7)) +
labs(color = "Country Hills Sampling Sites") +
annotate("segment", x= as.Date("2017-06-08"), xend = as.Date("2017-06-08"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-05-25"), xend = as.Date("2017-05-25"), y = 5, yend = 4.7, arrow = arrow(), size = 1) +
annotate("segment", x= as.Date("2017-09-13"), xend = as.Date("2017-09-13"), y = 5, yend = 4.7, arrow = arrow(), size = 1)
Seagull_at_Country_Hills
ggsave(filename = "../../results/LeeSg_at_Country_Hills.png",plot = Seagull_at_Country_Hills)
## Saving 7 x 5 in image
Inverness_bird_Arco <- ggplot(Inverness) +
geom_line(mapping = aes(x = Date_Sampled, y = CG01), color = "dark green") +
geom_line(mapping = aes(x = Date_Sampled, y = LeeSg), color = "purple") +
geom_line(mapping = aes(x = Date_Sampled, y = A_butzleri_HSP60), color = "navy") +
geom_hline(yintercept = 3.4, colour = "dark green", linetype = "dotted") +
geom_hline(yintercept = 3.58, colour = "purple", linetype = "dotted") +
geom_hline(yintercept = 3.69, colour = "navy", linetype = "dotted") +
geom_point(mapping = aes(x = Date_Sampled, y = A_butzleri_HSP60), color = "navy") +
geom_point(mapping = aes(x = Date_Sampled, y = CG01), color = "dark green") +
geom_point(mapping = aes(x = Date_Sampled, y = LeeSg), color = "purple") +
labs (x = "Date Sampled", y = "Log10 Copies/100mL") +
theme_bw()
Inverness_bird_Arco
RASTER PLOTS
raster_by_pond_HF183 <- ggplot(clean_data, aes(x = Date_Sampled, y = Pond, fill = HF183)) + geom_raster(interpolate = TRUE) + scale_fill_gradientn(colors = c("blue", "yellow", "red"))
raster_by_pond_HF183
## Warning in f(...): Raster pixels are placed at uneven horizontal intervals
## and will be shifted. Consider using geom_tile() instead.
raster_by_pond_Entero <- ggplot(clean_data, aes(x = Date_Sampled, y = Pond, fill = Enterococcus_CCE)) + geom_raster(interpolate = TRUE) + scale_fill_gradientn(colors = c("blue", "yellow", "purple"))
raster_by_pond_Entero
## Warning in f(...): Raster pixels are placed at uneven horizontal intervals
## and will be shifted. Consider using geom_tile() instead.
HEAT MAP
#heatmap <- heatmap(clean_data, aes(x = , y = pond)) + geom_tile(aes(fill = HF183))
COOCCURANCE PLOTS AND FIGURES
#Two way tables
table_arco_human <- table(new_wq$arco_y_n, new_wq$HF183_y_n)
table_arco_human
##
## no yes
## no 298 96
## yes 86 53
#shows the numbers
margin.table(table_arco_human)
## [1] 533
margin.table(table_arco_human, 1)
##
## no yes
## 394 139
margin.table(table_arco_human, 2)
##
## no yes
## 384 149
#gets the proportions (percents)
table_arco_human/margin.table(table_arco_human)
##
## no yes
## no 0.55909944 0.18011257
## yes 0.16135084 0.09943715
prop.table(table_arco_human) #prop.table is a command that will convert any table to proportions
##
## no yes
## no 0.55909944 0.18011257
## yes 0.16135084 0.09943715
#the yes-yes column in this table gives us the total number of samples positive for both variables out of all samples
#mosiac plot of table
mosaicplot(table_arco_human, shade = TRUE, main= "Two-way table of samples that are positive for Arcobacter butzleri and HF183", xlab = "Arcobacter butzlier", ylab = "HF183")
#instruction from: https://www.cyclismo.org/tutorial/R/tables.html
mosaicplot(table_arco_human, sort=c(2,1))
library(vcd)
##
## Attaching package: 'vcd'
## The following object is masked from 'package:mosaic':
##
## mplot
#vdc::mosaicplot()
#assoc_1 <- ggplot(data = clean_data) +
#geom_mosaic(aes(x = A_butzleri_HSP60_A, fill=HF183), divider="hbar", na.rm=TRUE) + labs(x=" ", title='divider = "hbar"')
CORRELATION
library(corrplot)
## corrplot 0.84 loaded
library(ggplot2)
corr_subset <- clean_data[c(8,9,10,11)]
corr_matrix_test <- cor(corr_subset)
corrplot(corr_matrix_test, method = "square")
#correlation with HF183, HumM2 and Arco
corr_subset <- clean_data[c(8,9,10,11,15)]
corr_matrix_arco_human <- cor(corr_subset)
corr_matrix_arco_human_plot <- corrplot(corr_matrix_arco_human, method = "square")
corr_matrix_arco_human_plot
## HF183 HumM2 CG01 LeeSg
## HF183 1.0000000000 0.28689668 -0.0005077393 0.18360616
## HumM2 0.2868966805 1.00000000 -0.0299054267 0.13117928
## CG01 -0.0005077393 -0.02990543 1.0000000000 -0.01105322
## LeeSg 0.1836061565 0.13117928 -0.0110532178 1.00000000
## A_butzleri_HSP60 0.0996056716 0.09316562 0.0625449999 0.16267569
## A_butzleri_HSP60
## HF183 0.09960567
## HumM2 0.09316562
## CG01 0.06254500
## LeeSg 0.16267569
## A_butzleri_HSP60 1.00000000
png(height=1800, width=1800, file="../../results/corrplot_arco_human_bird.png")
corr_plot = corrplot(corr_matrix_arco_human_plot, method = "square", tl.cex = 5)
dev.off()
## quartz_off_screen
## 2
#Save it as a table
#png("../../results/corr_matrix_arco_human).png", height=400, width=400)
#p<-tableGrob(corr_matrix_arco_human_plot)
#grid.arrange(p)
#dev.off()
#png("../../results/corr_matrix_arco_human.png", height=400, width=400)
#p<-grob(corr_matrix_arco_human_plot)
#dev.off()
d_cor = clean_data %>% select(A_butzleri_HSP60, HF183, HumM2) %>% cor()
png(height=1800, width=1800, file="../../results/corr_plot_arco_human.png")
corr_plot = corrplot(d_cor, method="color", tl.cex = 5)
dev.off()
## quartz_off_screen
## 2
TST
#clean_data %>%
#t.test(A_butzleri_HSP60, HF183)
ks.test(McCall_Lake$HF183, McCall_Lake$HumM2)
## Warning in ks.test(McCall_Lake$HF183, McCall_Lake$HumM2): p-value will be
## approximate in the presence of ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: McCall_Lake$HF183 and McCall_Lake$HumM2
## D = 0.75, p-value < 2.2e-16
## alternative hypothesis: two-sided
ks.test(McCall_Lake$HF183, McCall_Lake$A_butzleri_HSP60)
## Warning in ks.test(McCall_Lake$HF183, McCall_Lake$A_butzleri_HSP60): p-
## value will be approximate in the presence of ties
##
## Two-sample Kolmogorov-Smirnov test
##
## data: McCall_Lake$HF183 and McCall_Lake$A_butzleri_HSP60
## D = 0.57927, p-value < 2.2e-16
## alternative hypothesis: two-sided
t.test(McCall_Lake$HF183, McCall_Lake$HumM2)
##
## Welch Two Sample t-test
##
## data: McCall_Lake$HF183 and McCall_Lake$HumM2
## t = -5.8662, df = 200.04, p-value = 1.822e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.3290068 -0.1634653
## sample estimates:
## mean of x mean of y
## 3.643604 3.889840
#significant means you fail the null
#the null means they are equal to each other
# p value less than .05 means you reject the null
#these all reject the null are they are worng
D represents the value of the test-statistic (difference), so the KS-statistic and the p-value represents the likelihood of observing this particular value of D, or a “more extreme” value by pure chance.
LINEAR MODEL - SIMPLE Linear models describe a continuous response variable as a function of one or more predictor variables. They can help you understand and predict the behavior of complex systems or analyze experimental, financial, and biological data.
We want to see if other biological inputs can predict the presence of A. butzleri.
#http://r-statistics.co/Linear-Regression.html
linearMod_HF183 <- lm(A_butzleri_HSP60 ~ HF183, data = clean_data)
print(linearMod_HF183)
##
## Call:
## lm(formula = A_butzleri_HSP60 ~ HF183, data = clean_data)
##
## Coefficients:
## (Intercept) HF183
## 3.25313 0.07656
modelSummary <- summary(linearMod_HF183)
linearMod_HumM2 <- lm(A_butzleri_HSP60 ~ HumM2, data = clean_data)
linearMod_LeeSg <- lm(A_butzleri_HSP60 ~ LeeSg, data = clean_data)
linearMod_CG01 <- lm(A_butzleri_HSP60 ~ CG01, data = clean_data)
lm_log_HF183 = broom::tidy(linearMod_HF183)
lm_log_HuMm2 = broom::tidy(linearMod_HumM2)
lm_log_LeeSg = broom::tidy(linearMod_LeeSg)
lm_log_CG01 = broom::tidy(linearMod_CG01)
lm_variables = bind_rows(lm_log_HF183, lm_log_HuMm2, lm_log_LeeSg, lm_log_CG01)
lm_variables = lm_variables %>% filter(term != "(Intercept)")
saveRDS(lm_variables, file = "../../results/resulttable_lm.rds")
lm_variables2 = bind_rows(lm_log_HF183, lm_log_HuMm2, lm_log_LeeSg, lm_log_CG01)
lm_variables2 <- lm_variables2
png("../../results/resulttable_lm_3.png", height=400, width=400)
p<-tableGrob(lm_variables2)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
lm_variables <- lm_variables
png("../../results/resulttable_lm_2.png", height=400, width=400)
p<-tableGrob(lm_variables)
grid.arrange(p)
dev.off()
## quartz_off_screen
## 2
#p value is showing if that level within the facotr that you testing is a signifncat predictor our ooutcome
#if it less than .05 is has a signficiant affect on our outcome
#HF183, HumM2 and LeeSg are all signficant predictors